In [1]:
#library
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
import seaborn as sn
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix
import plotly.graph_objects as go
In [2]:
#dataset
data_load = pd.read_csv('GlaucomaM.csv')
In [3]:
data_load.head()
Out[3]:
ag at as an ai eag eat eas ean eai ... tmt tms tmn tmi mr rnf mdic emd mv Class
0 2.220 0.354 0.580 0.686 0.601 1.267 0.336 0.346 0.255 0.331 ... -0.018 -0.230 -0.510 -0.158 0.841 0.410 0.137 0.239 0.035 normal
1 2.681 0.475 0.672 0.868 0.667 2.053 0.440 0.520 0.639 0.454 ... -0.014 -0.165 -0.317 -0.192 0.924 0.256 0.252 0.329 0.022 normal
2 1.979 0.343 0.508 0.624 0.504 1.200 0.299 0.396 0.259 0.246 ... -0.097 -0.235 -0.337 -0.020 0.795 0.378 0.152 0.250 0.029 normal
3 1.747 0.269 0.476 0.525 0.476 0.612 0.147 0.017 0.044 0.405 ... -0.035 -0.449 -0.217 -0.091 0.746 0.200 0.027 0.078 0.023 normal
4 2.990 0.599 0.686 1.039 0.667 2.513 0.543 0.607 0.871 0.492 ... -0.105 0.084 -0.012 -0.054 0.977 0.193 0.297 0.354 0.034 normal

5 rows × 63 columns

In [4]:
data_load.isnull().sum()
Out[4]:
ag       0
at       0
as       0
an       0
ai       0
        ..
rnf      0
mdic     0
emd      0
mv       0
Class    0
Length: 63, dtype: int64
In [5]:
le = LabelEncoder()
In [6]:
data_load.Class = le.fit_transform(data_load.Class)
In [7]:
data_load['Class']
Out[7]:
0      1
1      1
2      1
3      1
4      1
      ..
191    0
192    0
193    0
194    0
195    0
Name: Class, Length: 196, dtype: int64
In [8]:
model_params = {
    'svm': {
        'model': svm.SVC(gamma='auto'),
        'params' : {
            'C': [1,10,20],
            'kernel': ['rbf','linear']
        }  
    },
    'random_forest': {
        'model': RandomForestClassifier(),
        'params' : {
            'n_estimators': [1,5,10]
        }
    },
    'logistic_regression' : {
        'model': LogisticRegression(solver='liblinear',multi_class='auto'),
        'params': {
            'C': [1,5,10]
        }
    }
}   
In [9]:
pd.DataFrame(model_params)
Out[9]:
svm random_forest logistic_regression
model SVC(gamma='auto') RandomForestClassifier() LogisticRegression(solver='liblinear')
params {'C': [1, 10, 20], 'kernel': ['rbf', 'linear']} {'n_estimators': [1, 5, 10]} {'C': [1, 5, 10]}
In [10]:
scores = []

for model_name, mp in model_params.items():
    clf =  GridSearchCV(mp['model'], mp['params'], cv=3, return_train_score=False)
    clf.fit(data_load.drop('Class',axis='columns'), data_load.Class)
    scores.append({
        'model': model_name,
        'best_score': clf.best_score_,
        'best_params': clf.best_params_
    })
    
df = pd.DataFrame(scores,columns=['model','best_score','best_params'])
df
Out[10]:
model best_score best_params
0 svm 0.882517 {'C': 1, 'kernel': 'linear'}
1 random_forest 0.836519 {'n_estimators': 10}
2 logistic_regression 0.851904 {'C': 1}
In [11]:
from sklearn.model_selection import train_test_split
In [12]:
X = data_load.drop('Class', axis='columns')
y = data_load.Class
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1,random_state=0)
In [13]:
model = SVC(C=1.5,kernel='linear', random_state=42)
In [14]:
model.fit(X_train, y_train)
Out[14]:
SVC(C=1.5, kernel='linear', random_state=42)
In [15]:
model.score(X_test, y_test)
Out[15]:
0.95
In [16]:
classes1 = {
    0:'Normal',
    1:'Gulcoma',
}
In [17]:
y_predicted = model.predict(X_test)
y_predicted
Out[17]:
array([1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0])
In [18]:
classes1[y_predicted[3]]
Out[18]:
'Normal'
In [19]:
cm = confusion_matrix(y_test, y_predicted)
cm
Out[19]:
array([[10,  1],
       [ 0,  9]])
In [20]:
fig = go.Figure(data=go.Heatmap(
                   z=cm,
                   x=['Normal','Glucoma'],
                   y=['Normal','Glucoma'],
                   hoverongaps = False))
fig.show()
In [21]:
import matplotlib.pyplot as plt
plt.bar(df['model'], df['best_score'])

plt.ylabel('Best Score')

plt.xlabel('Algorithms')


plt.show()
In [ ]: